The goal of this document is to assess the separation of different possible sources of organic matter in isotope space, as well as to filter out the samples that are not representative of likely sources. We will begin by just looking at particle data from Station ALOHA. The organic matter sources we will be considering are:
## SPECIFYING TRACERS TO BE USED IN MODEL ##
## Should amino acid δ15N data be included? (T/F)
include_d15N <- TRUE
## Should amino acid δ13C data be included? (T/F)
include_d13C <- FALSE
## Should essential amino acid δ13C data be mean normalized?
meannorm_d13C <- FALSE
# If using δ15N values in model
if(include_d15N == TRUE) {
## Which amino acid δ15N values should be included in the model? (should be
## consistent with column names in data file)
tracers_d15N <- c("d15NGlx","d15NAla","d15NAsx","d15NIle","d15NLeu","d15NPro","d15NVal","d15NSer","d15NGly","d15NLys","d15NPhe","d15NThr")
## What columns in the data file contain the uncertainties in those δ15N values?
SDtracers_d15N <- c("SDd15NGlx","SDd15NAla","SDd15NAsx","SDd15NIle","SDd15NLeu","SDd15NPro","SDd15NVal","SDd15NSer","SDd15NGly","SDd15NLys","SDd15NPhe","SDd15NThr")
} else {
tracers_d15N <- c()
SDtracers_d15N <- c()
}
# If using δ13C values in model
if(include_d13C == TRUE) {
## Which essential amino acid δ13C values should be included in the model?
## (should be consistent with column names in data file)
tracers_d13C <- c()
## What columns in the data file contain the uncertainties in those δ13C values?
SDtracers_d13C <- c()
} else {
tracers_d13C <- c()
SDtracers_d13C <- c()
}
## Now we'll make one vector containing all of the tracers to be used in the model
tracers_all <- c(tracers_d15N,tracers_d13C)
SDtracers_all <- c(SDtracers_d15N,SDtracers_d13C)
## We'll also specify other subsets of tracers to be assessed
tracers_SAA <- c("d15NSer","d15NGly","d15NLys","d15NPhe")
SDtracers_SAA <- c("SDd15NSer","SDd15NGly","SDd15NLys","SDd15NPhe")
tracers_select <- c("d15NLys","d15NPhe","d15NThr")
SDtracers_select <- c("Sd15NLys","SDd15NPhe","SDd15NThr")
tracers_complete <- c(tracers_select)
SDtracers_complete <- c(SDtracers_select)
## Code in chunks below will generally refer to the vector "tracers."
## this is done to make code easily portable, but we must update the "tracers"
## vector to reflect the current tracers of interest in a given portion of code
tracers <- tracers_all
SDtracers <- SDtracers_all
# Last we'll define some generic groups of AAs that may be useful later on.
# Update to reflect the column names in your data
# all amino acids
allAA = c("Ala", "Gly", "Thr", "Ser", "Val", "Leu", "Ile", "Pro", "Asx", "Met", "Glx", "Phe", "Tyr", "Lys", "SAA", "EAA")
# columns with SD
SDallAA <- c("SDAla", "SDGly", "SDThr", "SDSer", "SDVal", "SDLeu", "SDIle", "SDPro", "SDAsx", "SDMet", "SDGlx", "SDPhe", "SDTyr", "SDLys", "SDSAA", "SDEAA")
# source amino acids
srcAA = c("Phe","Gly","Ser","Lys")
# columns with SD
SDsrcAA = c("SDPhe","SDGly","SDSer","SDLys")
# trophic amino acids
trAA = c("Glx", "Asx", "Ala", "Ile", "Leu", "Pro", "Val","Thr")
# columns with SD
SDtrAA = c("SDGlx", "SDAsx", "SDAla", "SDIle", "SDLeu", "SDPro", "SDVal","SDThr")
# All amino acids - sorted by SAA TAA and Thr
allAA.ord <- c("Glx", "SDGlx", "Ala", "SDAla", "Asx", "SDAsx", "Ile", "SDIle", "Leu", "SDLeu", "Pro", "SDPro", "Val", "SDVal", "Ser", "SDSer", "Gly", "SDGly", "Tyr", "SDTyr", "Lys", "SDLys", "Met", "SDMet", "Phe", "SDPhe", "Thr", "SDThr","EAA","SDEAA")
## δ13C and δ15N data of organic matter sources should be stored in one, single
## .xlsx file.
## Columns containing amino acid δ-values and the associated standard
## deviations should match those defined in the chunk above.
# Indicate the name of the column describing the organic matter source to which
# each sample belongs
Source_Variable <- "Group"
# Indicate the names representing each possible source of organic matter in the
# order in which you would like the referenced
Sources <- c("Surface","Large","Small","Submicron")
# List the name of any additional variables that should be stored for analysis
Additional_variables <- c("Location","Epoch","Event","Size","Sizecat","Depth","Type")
variables <- c(Source_Variable, Additional_variables)
## IMPORTING DATA ##
# Define the location of .xlsx file, and sheet name if multiple sheets are
# present. If additional data beyond organic matter sources is present in this
# file that is ok. We will subset the data below.
Data.all <- rbind(
read_excel("Data/AA-CSIA_ALOHA_Summer.xlsx",
sheet = "d15N")[c(variables,tracers,SDtracers)],
read_excel("Data/AA-CSIA_ALOHA_Winter.xlsx",
sheet = "d15N")[c(variables,tracers,SDtracers)]
)
# Defining the preferred order in which to reference organic matter sources
Data.all[[Source_Variable]] <- factor(Data.all[[Source_Variable]], levels = c(Sources,"Mixed","Taxa"))
Data.sources <- subset(Data.all, Type == "Particle" | Type == "Trap")
Data.sources$Group <- factor(NA, levels = Sources)
The first thing we need to do is figure out what is the appropriate minimum depth cutoff for “deep” particles. We want \(\mathrm{\delta^{15}\text{N}}\) values to be essentially invariant with depth delow this cutoff, so let’s have a look at depth trends in particle \(\mathrm{\delta^{15}\text{N}}\) values.
Data.sources.long <-
melt(
Data.sources,
id.vars = variables,
variable.name = "Tracer",
value.name = "Value"
)
Data.sources.long <- Data.sources.long[which(Data.sources.long$Tracer %in% tracers),]
ggplot(data = Data.sources.long)+
geom_point(
aes(x=Value, y=Depth, color=Sizecat)
)+
facet_wrap(~Tracer, nrow=2, scales = "free_x")+
scale_y_reverse()
Let’s zoom in on the top 500 m.
Data.sources.long <-
melt(
Data.sources,
id.vars = variables,
variable.name = "Tracer",
value.name = "Value"
)
Data.sources.long <- Data.sources.long[which(Data.sources.long$Tracer %in% tracers),]
ggplot(data = Data.sources.long)+
geom_point(
aes(x=Value, y=Depth, color=Sizecat)
)+
coord_cartesian(ylim = c(500,0))+
facet_wrap(~Tracer, nrow=2, scales = "free_x")+
scale_y_reverse()
These plots show us that the data becomes mostly invariant with depth at and below about 200 m.
## FILTERING DATA ##
Data.sources <- subset(Data.all, Type == "Particle" | Type == "Trap")
Data.sources$Group <- factor(NA, levels = Sources)
Data.sources$Group[
which(Data.sources$Depth < 100)
] <- "Surface"
Data.sources$Group[
which(Data.sources$Depth > 190 &
(Data.sources$Sizecat == "Large" | Data.sources$Sizecat == "Trap"))
] <- "Large"
Data.sources$Group[
which(Data.sources$Depth > 190 & Data.sources$Sizecat == "Small")
] <- "Small"
Data.sources$Group[
which(Data.sources$Depth > 190 & Data.sources$Sizecat == "Submicron")
] <- "Submicron"
Data.sources <- subset(Data.sources, !is.na(Group))
Data.zoops <- subset(Data.all,
Group == "Mixed" | Group == "Taxa")
Next we need to decide which particle samples represent likely sources of organic matter to the zooplankton food web and which do not. We will do this by comparing the \(\mathrm{\delta^{15}\text{N}}\) values of Phe and Lys between zooplankton and particles.
First let’s take a look at all our organic matter sources and decide if there are any we can exclude right off the bat.
ggplot()+
geom_point(data = Data.sources,
aes(x=d15NPhe, y=d15NLys, color=Group),
size = 3)+
geom_point(data = Data.zoops,
aes(x=d15NPhe, y=d15NLys, shape=Size),
size = 2)+
labs(shape="Zooplankton",
color="Particles")+
ylab(expression(delta^{15}*N[" Lys"]))+
xlab(expression(delta^{15}*N[" Phe"]))
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).
ggplot()+
geom_point(data = Data.sources,
aes(x=d15NPhe, y=d15NThr, color=Group),
size = 3)+
geom_point(data = Data.zoops,
aes(x=d15NPhe, y=d15NThr, shape=Size),
size = 2)+
labs(shape="Zooplankton",
color="Particles")+
ylab(expression(delta^{15}*N[" Thr"]))+
xlab(expression(delta^{15}*N[" Phe"]))
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_point()`).
This suggests that submicron particles are probably not relevant to organic matter supply and can thus be excluded from our analysis. Small particles are also not likely to be very important in most samples, but because they are more abundant and strongly implicated in previous works we will retain them.
Now let’s have a look at surface particles and zooplankton and see if there is some subset of surface particles that are more relevant to our study.
ggplot()+
geom_point(data = subset(Data.sources,
Group == "Surface"),
aes(x=d15NPhe, y=d15NLys, color=Size),
size = 3)+
geom_point(data = subset(Data.zoops,
(Depth < 50)),
aes(x=d15NPhe, y=d15NLys, shape=interaction(Size,Type)),
size = 2)+
labs(shape="Zooplankton",
color="Particles")+
ylab(expression(delta^{15}*N[" Lys"]))+
xlab(expression(delta^{15}*N[" Phe"]))
There is really just one >53 μm particle that we can exclude for having a phenylalanine \(\mathrm{\delta^{15}\text{N}}\) value that is higher than all surface zooplankton samples.
## FILTERING DATA ##
Data.sources <- subset(Data.all, Type == "Particle" | Type == "Trap")
Data.sources$Group <- factor(NA, levels = Sources)
Data.sources$Group[
which(Data.sources$Depth < 100 & Data.sources$d15NPhe < 0)
] <- "Surface"
Data.sources$Group[
which(Data.sources$Depth > 190 &
(Data.sources$Sizecat == "Large" | Data.sources$Sizecat == "Trap"))
] <- "Large"
Data.sources$Group[
which(Data.sources$Depth > 190 & Data.sources$Sizecat == "Small")
] <- "Small"
Data.sources <- subset(Data.sources, !is.na(Group))
## PLOTTING ##
ggplot()+
geom_point(data = subset(Data.sources,
Group == "Surface"),
aes(x=d15NPhe, y=d15NLys, color=Size),
size = 3)+
geom_point(data = subset(Data.zoops,
(Depth < 100)),
aes(x=d15NPhe, y=d15NLys, shape=interaction(Size,Type)),
size = 2)+
labs(shape="Zooplankton",
color="Particles")+
ylab(expression(delta^{15}*N[" Lys"]))+
xlab(expression(delta^{15}*N[" Phe"]))
ggplot()+
geom_point(data = Data.sources,
aes(x=d15NPhe, y=d15NLys, color=Group),
size = 3)+
geom_point(data = Data.zoops,
aes(x=d15NPhe, y=d15NLys, shape=Size),
size = 2)+
labs(shape="Zooplankton",
color="Particles")+
ylab(expression(delta^{15}*N[" Lys"]))+
xlab(expression(delta^{15}*N[" Phe"]))
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).
Given the small amount of trophic discrimination we expect to see in Lys these data makes sense, with the majority of zooplankton falling within the particle phenylalanine \(\mathrm{\delta^{15}\text{N}}\) domain.
Next we need to deal with any gaps that exist in our data. Here will will just omit any samples for which all tracer data is not available.
## DEALING WITH GAPS IN THE DATA ##
# Should representative samples of organic matter end members be simulated
# from available data? (useful if a large number of samples have only δ13C or
# δ15N data). If FALSE, samples without values for all tracers will be excluded.
simulate_rep <- FALSE # SHOULD CHOOSE FALSE BY DEFAULT
Data.sources_orig <- Data.sources
## if we chose not to simulate representative samples...
if (simulate_rep == FALSE) {
Data.sources <- na.omit(Data.sources_orig[c(variables,tracers)]) # all samples containing NAs will be removed from data
## we will also find the mean tracer value for each source group
src.mn <- aggregate(Data.sources_orig[tracers], # aggregate source data
by=list(Group = Data.sources_orig[[Source_Variable]]), # by organic matter source group
FUN = mean, na.rm=TRUE)[-1] # taking a mean
## if we chose to simulate representative samples...
} else {
## first we will find the mean tracer value for each source group
src.mn <- aggregate(Data.sources_orig[tracers], # aggregate source data
by=list(Group = Data.sources_orig[[Source_Variable]]), # by organic matter source group
FUN = mean, na.rm=TRUE)[-1] # taking a mean
## Next we will find the most conservative estimate of tracer variance within
## each source group
# we will calculate the standard deviation within each population
src.SD.pop <- aggregate(Data.sources_orig[tracers], # aggregate source data
by=list(Group = Data.sources_orig[[Source_Variable]]), # by organic matter source group
FUN = sd, na.rm=TRUE)[-1] # calculating SD within the population
colnames(src.SD.pop) <- c(SDtracers)
# we will also propagate analytical uncertainty through the averaging equation
src.SD.prop <- aggregate(Data.sources_orig[SDtracers], # aggregate source data SDs
by=list(Group = Data.sources_orig[[Source_Variable]]), # by organic matter source group
FUN = SDmean, na.rm=TRUE)[-1] # calculating the SD propagated through the averaging equation
# and we will select the greater of the two estimates of uncertainty
src.SD <- pmax(src.SD.pop, src.SD.prop, na.rm=TRUE)
## Now we will simulate n samples for each organic matter source group
nsams <- 10 # number of samples to simulate from each source
Data.sources <- data.frame(matrix(nrow = nsams*length(Sources), ncol = length(tracers)+1)) # initializing empty data frame
colnames(Data.sources) <- c(tracers,Source_Variable) # naming columns
set.seed(123) # set seed for repeatable sample synthesis
for (i in 1:length(tracers)) { # loop over each tracer
Data.sources[[tracers[i]]] <- rnorm(n = nsams*length(Sources), # pulling n samples from a normal distribution
mean = src.mn[[tracers[i]]], # with a given mean
sd = src.SD[[SDtracers[i]]]) # and given SD
}
for (i in variables) {
Data.sources[[i]] <- seq(1,nsams*length(Sources))
}
Data.sources[[Source_Variable]] <- rep(Sources,nsams)
}
Now we will do a series of statistical tests on our data. We will look at three specific tracer sets:
We’ll wan to calculate Person’s correlation coefficients for all tracer pairs, and then for each set of tracers we’ll calculate the effective dimensionality, and fit PCAs and LDAs.
Here’s the data we are able to work with for these sources of organic matter.
kable(Data.sources, format="pipe")
| Group | Location | Epoch | Event | Size | Sizecat | Depth | Type | d15NGlx | d15NAla | d15NAsx | d15NIle | d15NLeu | d15NPro | d15NVal | d15NSer | d15NGly | d15NLys | d15NPhe | d15NThr |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Surface | ALOHA S | Summer | Pump Cast | 0.7-53 μm | Small | 25.0 | Particle | 1.484829 | 0.2621566 | 0.6311993 | 0.3282588 | -0.8689297 | 1.3601086 | 0.5503920 | -3.5679724 | -2.7768154 | -3.3241724 | -1.7010684 | -3.2925741 |
| Surface | ALOHA S | Summer | Pump Cast | 1-53 μm | Small | 50.0 | Particle | 2.044472 | 3.4539394 | 1.0298898 | 1.7623601 | -0.9956259 | 0.8069006 | 2.0269545 | -3.9616378 | -1.6631337 | -3.9014198 | -2.9171287 | -3.1572709 |
| Surface | ALOHA S | Summer | Pump Cast | 1-53 μm | Small | 75.0 | Particle | 3.250667 | 3.6536667 | 2.0230000 | 1.5026667 | 0.3960000 | 0.8923333 | 3.6500000 | -1.9240000 | -0.1750000 | -1.0910000 | -0.7136667 | -2.2276667 |
| Small | ALOHA S | Summer | Pump Cast | 1-53 μm | Small | 200.0 | Particle | 13.880354 | 14.1489502 | 10.8244576 | 9.4036231 | 9.3201721 | 9.9939215 | 6.2563283 | 4.5728580 | 6.0175357 | 3.7900082 | 4.8152633 | 0.3038518 |
| Small | ALOHA S | Summer | Pump Cast | 0.7-53 μm | Small | 247.5 | Particle | 14.699000 | 14.2663019 | 10.9235018 | 11.1843429 | 9.4458805 | 10.3536870 | 11.6212750 | 5.9833180 | 4.5618276 | 5.8408147 | 6.3369334 | 1.2524864 |
| Small | ALOHA S | Summer | Pump Cast | 0.7-53 μm | Small | 402.5 | Particle | 16.041232 | 15.1469502 | 12.5842852 | 11.5994349 | 9.4906657 | 10.1706786 | 8.9069479 | 4.6910557 | 7.2411040 | 5.7922405 | 7.5999218 | 1.5328477 |
| Small | ALOHA S | Summer | Pump Cast | 0.7-53 μm | Small | 600.0 | Particle | 16.084200 | 15.8286057 | 12.1225680 | 13.4255673 | 10.3490486 | 10.6624084 | 14.5783106 | 7.1154815 | 6.0129585 | 7.5881735 | 7.5178318 | 2.4000000 |
| Small | ALOHA S | Summer | Pump Cast | 0.7-53 μm | Small | 850.0 | Particle | 15.462667 | 15.5673333 | 12.3956667 | 10.9023333 | 10.5886667 | 11.2226667 | 12.9753333 | 7.4873333 | 6.0143333 | 6.8370000 | 7.4163333 | 2.4030000 |
| Small | ALOHA S | Summer | Pump Cast | 0.7-53 μm | Small | 1200.0 | Particle | 15.833715 | 16.3456049 | 12.5943486 | 12.1666423 | 9.7894775 | 10.7412939 | 14.3863024 | 6.8556156 | 6.3851387 | 6.6023863 | 7.0300927 | 3.4594236 |
| Surface | ALOHA S | Summer | Pump Cast | >53 μm | Large | 25.0 | Particle | 1.167459 | 3.1290725 | -0.0246575 | 0.2931710 | 0.9471575 | -0.2311250 | 1.7367140 | -4.1415565 | 0.6974598 | -1.7106356 | -3.0397050 | 0.5753777 |
| Large | ALOHA S | Summer | Pump Cast | >53 μm | Large | 245.0 | Particle | 8.292695 | 9.4215170 | 5.4600401 | 5.7175542 | 5.8033922 | 5.9414570 | 6.1301973 | 0.9230347 | 3.6817450 | 1.5502729 | 0.3899082 | -2.9458818 |
| Large | ALOHA S | Summer | Pump Cast | >53 μm | Large | 400.0 | Particle | 12.143149 | 11.0651517 | 7.9297650 | 8.9995456 | 8.7545694 | 7.9349008 | 9.3277212 | 2.5105021 | 3.9978206 | 0.8965012 | 1.5617544 | -3.9677564 |
| Large | ALOHA S | Summer | Pump Cast | >53 μm | Large | 600.0 | Particle | 12.181353 | 9.9185662 | 8.7742397 | 8.7404411 | 8.4962629 | 8.5514444 | 8.1667259 | 2.2735137 | 2.6139146 | 2.8256736 | 2.8672322 | -4.8145505 |
| Large | ALOHA S | Summer | Pump Cast | >53 μm | Large | 1200.0 | Particle | 10.343974 | 10.6392268 | 7.2430510 | 6.8627782 | 7.4888370 | 6.6870695 | 7.2719063 | 3.3419059 | 3.8087448 | 1.3385177 | 2.7256373 | -3.7219956 |
| Surface | ALOHA W | Winter | Pump Cast | 0.2-0.7 μm | Submicron | 25.0 | Particle | 2.322083 | 2.4471667 | 1.6890833 | -1.1169167 | -1.3130833 | 3.9470000 | 0.8112500 | -3.5631667 | -3.0676667 | -1.4660000 | -0.9154167 | -0.2457500 |
| Surface | ALOHA W | Winter | Pump Cast | 0.7-53 μm | Small | 25.0 | Particle | 2.027155 | 2.8039849 | 1.1273515 | 0.5149970 | 0.5613993 | 3.0583099 | 1.4477369 | -3.3370298 | -1.6066036 | -1.1066867 | -1.2505243 | -2.0367443 |
| Surface | ALOHA W | Winter | Pump Cast | 1-53 μm | Small | 50.0 | Particle | 2.166333 | 2.5840000 | 1.5290000 | 0.5430000 | -0.6033333 | 0.9650000 | 0.6343333 | -3.5643333 | -0.9803333 | -1.5170000 | -0.6526667 | -2.1916667 |
| Surface | ALOHA W | Winter | Pump Cast | 1-53 μm | Small | 75.0 | Particle | 2.060727 | 1.7198007 | 0.9348772 | -0.1906121 | -1.6836965 | -0.5200061 | -2.2163488 | -3.6222195 | -1.9154255 | -2.5226780 | -3.1584011 | -0.7535370 |
| Small | ALOHA W | Winter | Pump Cast | 1-53 μm | Small | 200.0 | Particle | 13.442670 | 14.5217861 | 9.6566291 | 9.3731502 | 8.4201415 | 7.9392462 | 8.2062151 | 5.2432115 | 4.3260328 | 3.6740667 | 4.1409110 | -0.6041100 |
| Small | ALOHA W | Winter | Pump Cast | 0.7-53 μm | Small | 250.0 | Particle | 13.092000 | 18.3280000 | 10.6310000 | 9.6893333 | 10.1280000 | 11.2886667 | 5.2836667 | 6.5903333 | 7.0386667 | 5.8466667 | 6.6136667 | 3.6270000 |
| Small | ALOHA W | Winter | Pump Cast | 0.7-53 μm | Small | 400.0 | Particle | 14.424954 | 15.5065000 | 11.2179758 | 10.3030000 | 9.4642994 | 10.8963544 | 12.2105000 | 6.8052500 | 6.1265000 | 5.4328426 | 6.4251383 | 3.5220000 |
| Small | ALOHA W | Winter | Pump Cast | 0.7-53 μm | Small | 600.0 | Particle | 15.240750 | 16.3385000 | 12.5092500 | 12.8867500 | 10.8735000 | 11.0805000 | 13.0555000 | 6.8430000 | 5.9295000 | 4.8970000 | 6.5962500 | 3.6400000 |
| Small | ALOHA W | Winter | Pump Cast | 0.7-53 μm | Small | 850.0 | Particle | 15.025667 | 15.1166667 | 11.5986667 | 10.9413333 | 9.5676667 | 10.4040000 | 14.1173333 | 6.9376667 | 5.4343333 | 6.4146667 | 7.4680000 | 3.0073333 |
| Small | ALOHA W | Winter | Pump Cast | 0.7-53 μm | Small | 1195.0 | Particle | 14.499500 | 16.6225000 | 12.3325000 | 11.3030000 | 9.9117500 | 10.8220000 | 11.9137500 | 5.5637500 | 5.0992500 | 3.8030000 | 7.3565000 | 3.8367500 |
| Large | ALOHA W | Winter | Pump Cast | >53 μm | Large | 250.0 | Particle | 11.467638 | 9.6885269 | 6.7369709 | 7.1564848 | 6.4648852 | 6.6199591 | 5.5892913 | 0.8378478 | 2.1776661 | 3.0323176 | 2.6104768 | -3.5351356 |
| Large | ALOHA W | Winter | Pump Cast | >53 μm | Large | 250.0 | Particle | 14.344732 | 14.0970793 | 9.4382148 | 13.9198995 | 12.1231701 | 8.4286556 | 7.8475857 | 4.2032666 | 4.4954637 | 2.0644772 | 3.7648048 | -7.7372552 |
| Large | ALOHA W | Winter | Pump Cast | >53 μm | Large | 850.0 | Particle | 13.018884 | 10.1461334 | 10.3261097 | 9.3815778 | 8.9776616 | 9.2483129 | 9.2769923 | 0.6273126 | 4.5665259 | 2.0559601 | 3.6494712 | -5.3567263 |
Now lets visualize the \(\mathrm{\delta^{15}\text{N}}\) values of each amino acid in each organic matter source.
if (include_d13C == TRUE) {
sources.dC.long <-
melt(Data.sources, id.vars=c(variables), measure.vars = intersect(tracers_d13C, tracers),
variable.name="AA")
sources.dC.long$AA <- sub('....','',sources.dC.long$AA)
sources.dC.long$iso <- "d13C"
if (include_d15N == FALSE) {
sources.long <- sources.dC.long
}
}
if (include_d15N == TRUE) {
sources.dN.long <-
melt(Data.sources, id.vars=c(variables), measure.vars = intersect(tracers_d15N, tracers),
variable.name="AA")
sources.dN.long$AA <- sub('....','',sources.dN.long$AA)
sources.dN.long$iso <- "d15N"
if (include_d13C == FALSE) {
sources.long <- sources.dN.long
}
}
if (include_d13C == include_d15N) {
sources.long <- rbind(sources.dN.long,sources.dC.long)
}
# Defining the preferred order in which to reference amino acids
sources.long$AA <- factor(sources.long$AA, levels = allAA.ord)
theme_set(theme_light()+
theme(panel.grid.major.x = element_line(colour = NA),
panel.grid.major.y = element_line(colour = "grey95"),
# axis.text.x = element_text(angle = 60, vjust = 1.6, hjust=1.7),
axis.text.x = element_text(angle = 90, vjust = 0.5),
axis.ticks.x = element_blank(),
axis.title.x = element_blank()))
AA_plots <-
ggplot(sources.long, aes(x = AA, y = value, color = Group, shape = Group, group=Group))+
geom_point(alpha=0.6, size=1, position = position_dodge(width = 0.8))+
ylab(expression(delta^{15}*N*" (\u2030)"))+
labs(color="Organic Matter Source", shape="Organic Matter Source")+
theme(axis.title.x = element_blank())+
coord_cartesian(ylim = c(-9, 19),expand=FALSE)+
geom_vline(xintercept = seq(0.5,length(sources.long$AA),1), color="grey75", lwd=0.3)
AA_plots
Our first task is to conduct a pairwise comparison tests to see which tracers can differentiate which sources. We’ll use PERMANOVA as a multivariate of weather or not different sources have significantly different \(\mathrm{\delta^{15}\text{N}}\) values. We will use Tukey as a pairwise assessment to see which amino acids alone have different \(\mathrm{\delta^{15}\text{N}}\) values between groups. We’ll run these tests first with source group and season as predictors to see if summer vs winter differences are impacting the data. Expand the code chunk to see permanova results.
# First doing statistical analyses with particle size and season as predictor variables
adonis2(Data.sources[tracers] ~ Group + Epoch, data=Data.sources, method="euclidean", by="terms")
Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Permutation: free
Number of permutations: 999
adonis2(formula = Data.sources[tracers] ~ Group + Epoch, data = Data.sources, method = "euclidean", by = "terms")
Df SumOfSqs R2 F Pr(>F)
Group 2 5707.7 0.91248 122.6062 0.001 ***
Epoch 1 12.1 0.00194 0.5207 0.498
Residual 23 535.4 0.08559
Total 26 6255.2 1.00000
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Tukey <- list()
par(mar=c(2,8,2,0.5),mfrow=c(6,4),col.main="white")
for (i in tracers) {
AOV <- aov(Data.sources[[i]] ~ Group + Epoch, data = Data.sources)
print(i)
print(summary(AOV))
TukeyHSD(AOV, conf.level=.95)
Tukey[[i]] <- TukeyHSD(AOV, conf.level=.95)
Tukey[[i]]
plot(Tukey[[i]], las=1, sub=i)
mtext(i, side=3)
}
[1] "d15NGlx"
Df Sum Sq Mean Sq F value Pr(>F)
Group 2 799.9 399.9 255.928 <2e-16 ***
Epoch 1 0.1 0.1 0.087 0.771
Residuals 23 35.9 1.6
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "d15NAla"
Df Sum Sq Mean Sq F value Pr(>F)
Group 2 829.1 414.5 261.478 <2e-16 ***
Epoch 1 2.3 2.3 1.423 0.245
Residuals 23 36.5 1.6
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "d15NAsx"
Df Sum Sq Mean Sq F value Pr(>F)
Group 2 530.7 265.36 207.861 1.88e-15 ***
Epoch 1 0.4 0.38 0.299 0.59
Residuals 23 29.4 1.28
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "d15NIle"
Df Sum Sq Mean Sq F value Pr(>F)
Group 2 561.4 280.68 96.674 6.39e-12 ***
Epoch 1 0.0 0.01 0.003 0.955
Residuals 23 66.8 2.90
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "d15NLeu"
Df Sum Sq Mean Sq F value Pr(>F)
Group 2 537.1 268.56 168.916 1.78e-14 ***
Epoch 1 0.2 0.18 0.112 0.741
Residuals 23 36.6 1.59
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "d15NPro"
Df Sum Sq Mean Sq F value Pr(>F)
Group 2 408.1 204.06 145.218 9e-14 ***
Epoch 1 1.7 1.68 1.197 0.285
Residuals 23 32.3 1.41
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "d15NVal"
Df Sum Sq Mean Sq F value Pr(>F)
Group 2 486.0 242.99 40.268 3.06e-08 ***
Epoch 1 5.1 5.14 0.852 0.366
Residuals 23 138.8 6.03
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "d15NSer"
Df Sum Sq Mean Sq F value Pr(>F)
Group 2 450.5 225.25 206.080 2.07e-15 ***
Epoch 1 0.0 0.01 0.008 0.928
Residuals 23 25.1 1.09
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "d15NGly"
Df Sum Sq Mean Sq F value Pr(>F)
Group 2 257.17 128.58 125.433 4.25e-13 ***
Epoch 1 1.00 1.00 0.972 0.334
Residuals 23 23.58 1.03
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "d15NLys"
Df Sum Sq Mean Sq F value Pr(>F)
Group 2 280.39 140.19 109.604 1.74e-12 ***
Epoch 1 0.01 0.01 0.006 0.938
Residuals 23 29.42 1.28
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "d15NPhe"
Df Sum Sq Mean Sq F value Pr(>F)
Group 2 341.8 170.89 135.59 1.87e-13 ***
Epoch 1 1.0 1.03 0.82 0.375
Residuals 23 29.0 1.26
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "d15NThr"
Df Sum Sq Mean Sq F value Pr(>F)
Group 2 225.60 112.80 49.881 4.32e-09 ***
Epoch 1 0.29 0.29 0.128 0.724
Residuals 23 52.01 2.26
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Seeing that season is not a significant predictor of \(\mathrm{\delta^{15}\text{N}_\text{AA}}\) values, we will rerun these analysis without differentiating when samples were collected.
# Next, seeing that season is not significant, we just use particle size as a predictor
adonis2(Data.sources[tracers] ~ Group, data=Data.sources, method="euclidean", by="terms")
Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Permutation: free
Number of permutations: 999
adonis2(formula = Data.sources[tracers] ~ Group, data = Data.sources, method = "euclidean", by = "terms")
Df SumOfSqs R2 F Pr(>F)
Group 2 5707.7 0.91248 125.1 0.001 ***
Residual 24 547.5 0.08752
Total 26 6255.2 1.00000
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Tukey <- list()
par(mar=c(2,8,2,0.5),mfrow=c(4,3),col.main="white")
for (i in tracers) {
AOV <- aov(Data.sources[[i]] ~ Group, data = Data.sources)
print(i)
print(summary(AOV))
TukeyHSD(AOV, conf.level=.95)
Tukey[[i]] <- TukeyHSD(AOV, conf.level=.95)
Tukey[[i]]
plot(Tukey[[i]], las=1, sub=i)
mtext(i, side=3)
}
[1] "d15NGlx"
Df Sum Sq Mean Sq F value Pr(>F)
Group 2 799.9 399.9 266.1 <2e-16 ***
Residuals 24 36.1 1.5
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "d15NAla"
Df Sum Sq Mean Sq F value Pr(>F)
Group 2 829.1 414.5 256.9 <2e-16 ***
Residuals 24 38.7 1.6
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "d15NAsx"
Df Sum Sq Mean Sq F value Pr(>F)
Group 2 530.7 265.36 214.1 4.99e-16 ***
Residuals 24 29.7 1.24
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "d15NIle"
Df Sum Sq Mean Sq F value Pr(>F)
Group 2 561.4 280.68 100.9 2.09e-12 ***
Residuals 24 66.8 2.78
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "d15NLeu"
Df Sum Sq Mean Sq F value Pr(>F)
Group 2 537.1 268.56 175.4 4.75e-15 ***
Residuals 24 36.7 1.53
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "d15NPro"
Df Sum Sq Mean Sq F value Pr(>F)
Group 2 408.1 204.06 144 4.28e-14 ***
Residuals 24 34.0 1.42
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "d15NVal"
Df Sum Sq Mean Sq F value Pr(>F)
Group 2 486.0 243 40.52 2.02e-08 ***
Residuals 24 143.9 6
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "d15NSer"
Df Sum Sq Mean Sq F value Pr(>F)
Group 2 450.5 225.25 215 4.77e-16 ***
Residuals 24 25.1 1.05
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "d15NGly"
Df Sum Sq Mean Sq F value Pr(>F)
Group 2 257.17 128.58 125.6 1.94e-13 ***
Residuals 24 24.57 1.02
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "d15NLys"
Df Sum Sq Mean Sq F value Pr(>F)
Group 2 280.39 140.19 114.3 5.39e-13 ***
Residuals 24 29.43 1.23
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "d15NPhe"
Df Sum Sq Mean Sq F value Pr(>F)
Group 2 341.8 170.89 136.6 7.68e-14 ***
Residuals 24 30.0 1.25
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "d15NThr"
Df Sum Sq Mean Sq F value Pr(>F)
Group 2 225.6 112.80 51.76 1.97e-09 ***
Residuals 24 52.3 2.18
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Now we need to assess which of these are correlated and thus may only provide redundant information. We will generate a correlation matrix, quantifying colinearity with Pearson’s correlation coefficients.
corel <- cor(Data.sources[tracers], method = "pearson")
kable(corel, format = "pipe")
| d15NGlx | d15NAla | d15NAsx | d15NIle | d15NLeu | d15NPro | d15NVal | d15NSer | d15NGly | d15NLys | d15NPhe | d15NThr | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| d15NGlx | 1.0000000 | 0.9633562 | 0.9886089 | 0.9808368 | 0.9765613 | 0.9652689 | 0.9070356 | 0.9551533 | 0.9446843 | 0.9315001 | 0.9491057 | 0.3700737 |
| d15NAla | 0.9633562 | 1.0000000 | 0.9683775 | 0.9529956 | 0.9568742 | 0.9548824 | 0.8724740 | 0.9770930 | 0.9645771 | 0.9355227 | 0.9521047 | 0.5062913 |
| d15NAsx | 0.9886089 | 0.9683775 | 1.0000000 | 0.9654164 | 0.9579344 | 0.9762420 | 0.9121958 | 0.9611684 | 0.9464814 | 0.9403546 | 0.9733671 | 0.4610596 |
| d15NIle | 0.9808368 | 0.9529956 | 0.9654164 | 1.0000000 | 0.9788854 | 0.9340866 | 0.9059659 | 0.9387214 | 0.9305482 | 0.8923038 | 0.9205900 | 0.3198700 |
| d15NLeu | 0.9765613 | 0.9568742 | 0.9579344 | 0.9788854 | 1.0000000 | 0.9491280 | 0.8781665 | 0.9360644 | 0.9506110 | 0.8890549 | 0.9064529 | 0.2885070 |
| d15NPro | 0.9652689 | 0.9548824 | 0.9762420 | 0.9340866 | 0.9491280 | 1.0000000 | 0.8859570 | 0.9462793 | 0.9169509 | 0.9310110 | 0.9581904 | 0.4481009 |
| d15NVal | 0.9070356 | 0.8724740 | 0.9121958 | 0.9059659 | 0.8781665 | 0.8859570 | 1.0000000 | 0.9055753 | 0.8520602 | 0.8857249 | 0.8968807 | 0.4602613 |
| d15NSer | 0.9551533 | 0.9770930 | 0.9611684 | 0.9387214 | 0.9360644 | 0.9462793 | 0.9055753 | 1.0000000 | 0.9342859 | 0.9518558 | 0.9607323 | 0.5365553 |
| d15NGly | 0.9446843 | 0.9645771 | 0.9464814 | 0.9305482 | 0.9506110 | 0.9169509 | 0.8520602 | 0.9342859 | 1.0000000 | 0.9144850 | 0.9134890 | 0.4538430 |
| d15NLys | 0.9315001 | 0.9355227 | 0.9403546 | 0.8923038 | 0.8890549 | 0.9310110 | 0.8857249 | 0.9518558 | 0.9144850 | 1.0000000 | 0.9649867 | 0.5893354 |
| d15NPhe | 0.9491057 | 0.9521047 | 0.9733671 | 0.9205900 | 0.9064529 | 0.9581904 | 0.8968807 | 0.9607323 | 0.9134890 | 0.9649867 | 1.0000000 | 0.5728253 |
| d15NThr | 0.3700737 | 0.5062913 | 0.4610596 | 0.3198700 | 0.2885070 | 0.4481009 | 0.4602613 | 0.5365553 | 0.4538430 | 0.5893354 | 0.5728253 | 1.0000000 |
mapcc <- mean(abs(corel[lower.tri(corel)]))
#create pairs plot
pairs.panels(Data.sources[tracers], xaxt="n", yaxt="n", lm=TRUE, smoother=FALSE, stars = FALSE, scale=FALSE, ellipses = FALSE, ci=TRUE, method = "pearson")
The mean absolute Pearson’s correlation coefficient for these data is 0.8570597.
Let’s also quickly calculate the effective dimensionality of the dataset.
ED <- estimate.ED(Data.sources[c(tracers)])
[1] ED estimated from the correlation matrix; no disattenuation; no small-sample correction.
ED$n1
[1] 1.66
The effective dimensionaltiy of the dataset is 1.66.
Now we will do a PCA to produce a 2-D visualization of how these groups separate in this multivariate parameter space.
# Fitting PCA and adding Type as a supplimental qualitative variable
PCA.OSP = PCA(Data.sources[c(tracers)], scale.unit = TRUE, graph = FALSE)
# summary(PCA.lit)
# Plot component eigenvalues and print
fviz_eig(PCA.OSP, addlabels = FALSE)
# Plot PC1&2 results and project variable vectors and print
fviz_pca_biplot(PCA.OSP, axes = c(1,2), col.ind = Data.sources$Group,
addlabels=FALSE, addEllipses = TRUE, col.var = 'grey50', repel=TRUE)
fviz_pca_biplot(PCA.OSP, axes = c(1,3), col.ind = Data.sources$Group,
addlabels=FALSE, addEllipses = TRUE, col.var = 'grey50', repel=TRUE)
# generate 3d plot of PC1 PC2 and PC3
PCA.results <- as.data.frame(PCA.OSP$ind$coord)
PCA.results$Type <- Data.sources$Group
plot_ly(data=PCA.results, x=~Dim.1, y=~Dim.2, z=~Dim.3,
type="scatter3d", mode="markers", color=PCA.results$Type,
marker = list(line = list(color = "1", width = 0.5)))
# Last let's have a look at PCA loadings
p1<-ggplot(aes(x=rownames(PCA.OSP$var$contrib),y=PCA.OSP$var$contrib[,1]),data=NULL)+
geom_col()+ylab("PC1")+ggtitle("PCA Loadings")+
theme(axis.text.x = element_blank(),axis.title.x = element_blank(),
plot.title=element_text(hjust=0.5))
p2<-ggplot(aes(x=rownames(PCA.OSP$var$contrib),y=PCA.OSP$var$contrib[,2]),data=NULL)+
geom_col()+ylab("PC2")+
theme(axis.text.x = element_blank(),axis.title.x = element_blank())
p3<-ggplot(aes(x=rownames(PCA.OSP$var$contrib),y=PCA.OSP$var$contrib[,3]),data=NULL)+
geom_col()+ylab("PC3")+
theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1),
axis.title.x = element_blank())
ggarrange(p1,p2,p3,nrow=3,heights=c(1.2,1,1.5 ))
# saving out PCA results for later
PCA.all <- PCA.OSP
Next we will carry out an LDA to see if that improves group separation.
# Defining a data frame with sample type in column 1 and 6 EAA's in columns 2-7.
data.train = Data.sources[c("Group",tracers)]
# Subsetting data to include Microalgae, Fungi, and Bacteria,
# and not macroalgae, plants, and seagrass
# data.train = subset(data.train, Type == "Microalgae" | Type == "Fungi" | Type == "Bacteria")
ntypes = nlevels(as.factor(data.train$Group))
# fitting the model with leave one out cross validation
LDA.test = lda(Group ~ . ,data = data.train, CV = TRUE,
prior = rep(1/ntypes, ntypes))
Warning in lda.default(x, grouping, ...): group Submicron is empty
# print model result
# LDA.train
# create a table which compares the classification of the LDA model to the actual producer type
ct.prod.norm <- table(data.train$Group,
LDA.test$class)
# total percent of samples correctly classified is the sum of the diagonal of this table
noquote(c('% successfully categorized: ', sum(diag(prop.table(ct.prod.norm))))) #85% effective
[1] % successfully categorized: 0.962962962962963
# Refitting the model using all of the available training data
LDA.full = lda(Group ~ . ,data = data.train, CV = FALSE, prior = rep(1/ntypes, ntypes))
Warning in lda.default(x, grouping, ...): group Submicron is empty
LDA.full
Call:
lda(Group ~ ., data = data.train, CV = FALSE, prior = rep(1/ntypes,
ntypes))
Prior probabilities of groups:
Surface Large Small
0.25 0.25 0.25
Group means:
d15NGlx d15NAla d15NAsx d15NIle d15NLeu d15NPro d15NVal
Surface 2.065466 2.506723 1.117468 0.4546156 -0.445014 1.284815 1.080129
Large 11.684632 10.710886 7.986913 8.6826116 8.301254 7.630257 7.658631
Small 14.810559 15.644808 11.615904 11.0982092 9.779106 10.464619 11.125955
d15NSer d15NGly d15NLys d15NPhe d15NThr
Surface -3.460240 -1.435940 -2.079949 -1.793572 -1.666229
Large 2.102483 3.620269 1.966246 2.509898 -4.582757
Small 6.224073 5.848932 5.543239 6.609737 2.365049
Coefficients of linear discriminants:
LD1 LD2 LD3
d15NGlx 1.62520301 -0.61090276 -0.04124058
d15NAla 0.54643298 -0.67613137 0.91935973
d15NAsx -0.37756591 0.46811820 0.52708874
d15NIle -0.78530840 0.81345393 0.01697736
d15NLeu -0.17903611 -0.50794449 -0.51416226
d15NPro 0.32368444 -0.06607683 -0.27000541
d15NVal 0.03705049 -0.38977915 0.11144004
d15NSer 0.25449266 0.95072197 -0.69750294
d15NGly 0.31480716 0.28110414 -0.67243509
d15NLys -0.11084839 -0.05994609 0.05846311
d15NPhe -0.49079099 0.37333643 -0.13698729
d15NThr 0.01411409 0.59792649 0.25418698
Proportion of trace:
LD1 LD2 LD3
0.8334 0.1275 0.0391
# save LDA for later use
LDA.all <- LDA.full
# store locations of training data in LD space for later plotting
pred.train = predict(LDA.full, data.train[-1])
class.train = data.frame('Type' = data.train$Group, pred.train$x)
theme_set(theme_light()+
theme(panel.grid.major.x = element_line(colour = NA),
panel.grid.major.y = element_line(colour = "grey95")))
# Generate biplot with Zooplankton overlayed onto training data
plot.mix.LD12 = ggplot(data = class.train,
aes(x = LD2, y = LD1, color = Type), size = 2) +
geom_point(alpha = 0.6) +
stat_ellipse(type = 't', alpha = 0.6)+
# geom_point(data = class.zoops,
# aes(x = LD2, y = LD1, shape = Predicted),color = 'black')+
# geom_text(data = class.zoops,
# aes(label=Depth),hjust=-0.2, vjust=1.1)+
# coord_cartesian(xlim=c(-5,5), ylim=c(-7.5,5)) +
theme(legend.position = 'none')
plot.mix.LD13 = ggplot(data = class.train,
aes(x = LD3, y = LD1, color = Type), size = 2) +
geom_point(alpha = 0.6) +
stat_ellipse(type = 't', alpha = 0.6)+
# geom_point(data = class.zoops,
# aes(x = LD3, y = LD1, shape = Predicted), color = 'black')+
# geom_text(data = class.zoops,
# aes(label=Depth),hjust=-0.2, vjust=1.1)+
# coord_cartesian(xlim=c(-5,5), ylim=c(-7.5,5)) +
labs(shape = "Zooplankton") + labs(color = "Training Data")
grid.arrange(ncol = 2, widths = c(1,1.4), plot.mix.LD12, plot.mix.LD13, top = 'Literature Producers') + theme(legend.position = 'top')
NULL
# We'll also try and put LD1, 2, and 3 on 3D axes
plot_ly(data=class.train, x=~LD1, y=~LD2, z=~LD3,
type="scatter3d", mode="markers", color=class.train$Type,
marker = list(line = list(color = "1", width = 0.5)))
# Last let's have a look at PCA loadings
p1<-ggplot(aes(x=rownames(LDA.all[[4]]),y=LDA.all[[4]][,1]),data=NULL)+
geom_col()+ylab("LD1")+ggtitle("LDA Loadings")+
theme(axis.text.x = element_blank(),axis.title.x = element_blank(),
plot.title=element_text(hjust=0.5))
p2<-ggplot(aes(x=rownames(LDA.all[[4]]),y=LDA.all[[4]][,2]),data=NULL)+
geom_col()+ylab("LD2")+
theme(axis.text.x = element_blank(),axis.title.x = element_blank())
p3<-ggplot(aes(x=rownames(LDA.all[[4]]),y=LDA.all[[4]][,3]),data=NULL)+
geom_col()+ylab("LD3")+
theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1),
axis.title.x = element_blank())
ggarrange(p1,p2,p3,nrow=3,heights=c(1.2,1,1.5 ))
Now we’ll run some of the same multivariate analyses but only looking at \(\mathrm{\delta^{15}\text{N}_\text{SAA}}\) values.
tracers <- tracers_SAA
SDtracers <- SDtracers_SAA
include_d13C <- FALSE
if (include_d13C == TRUE) {
sources.dC.long <-
melt(Data.sources, id.vars=c(variables), measure.vars = intersect(tracers_d13C, tracers),
variable.name="AA")
sources.dC.long$AA <- sub('....','',sources.dC.long$AA)
sources.dC.long$iso <- "d13C"
if (include_d15N == FALSE) {
sources.long <- sources.dC.long
}
}
if (include_d15N == TRUE) {
sources.dN.long <-
melt(Data.sources, id.vars=c(variables), measure.vars = intersect(tracers_d15N, tracers),
variable.name="AA")
sources.dN.long$AA <- sub('....','',sources.dN.long$AA)
sources.dN.long$iso <- "d15N"
if (include_d13C == FALSE) {
sources.long <- sources.dN.long
}
}
if (include_d13C == include_d15N) {
sources.long <- rbind(sources.dN.long,sources.dC.long)
}
# Defining the preferred order in which to reference amino acids
sources.long$AA <- factor(sources.long$AA, levels = allAA.ord)
theme_set(theme_light()+
theme(panel.grid.major.x = element_line(colour = NA),
panel.grid.major.y = element_line(colour = "grey95"),
axis.text.x = element_text(angle = 60, vjust = 1.6, hjust=1.7),
axis.ticks.x = element_blank()))
AA_plots <-
ggplot(sources.long, aes(x = AA, y = value, color = Group, group=Group))+
geom_point(alpha=0.6, position = position_dodge(width = 0.8))+
ylab(expression(delta^{15}*N*" or "*delta^{13}*C*" (\u2030)"))+
labs(color="Organic Matter Source", shape="Food Web Base", fill="Zooplankton Samples")+
facet_row(~iso, scales="free", space="free")+
geom_vline(xintercept = seq(0.5,length(sources.long$AA),1), color="grey75", lwd=0.3)
AA_plots
Let’s also quickly calculate the effective dimensionality of the dataset.
ED <- estimate.ED(Data.sources[c(tracers)])
[1] ED estimated from the correlation matrix; no disattenuation; no small-sample correction.
ED$n1
[1] 1.25
The effective dimensionaltiy of the dataset is 1.25.
And a PCA to produce a 2-D visualization of how these groups separate in this multivariate parameter space.
# Fitting PCA and adding Type as a supplimental qualitative variable
PCA.OSP = PCA(Data.sources[c(tracers)], scale.unit = TRUE, graph = FALSE)
# summary(PCA.lit)
# Plot component eigenvalues and print
fviz_eig(PCA.OSP, addlabels = FALSE)
# Plot PC1&2 results and project variable vectors and print
fviz_pca_biplot(PCA.OSP, axes = c(1,2), col.ind = Data.sources$Group,
addlabels=FALSE, addEllipses = TRUE, col.var = 'grey50', repel=TRUE)
fviz_pca_biplot(PCA.OSP, axes = c(1,3), col.ind = Data.sources$Group,
addlabels=FALSE, addEllipses = TRUE, col.var = 'grey50', repel=TRUE)
# generate 3d plot of PC1 PC2 and PC3
PCA.results <- as.data.frame(PCA.OSP$ind$coord)
PCA.results$Type <- Data.sources$Group
plot_ly(data=PCA.results, x=~Dim.1, y=~Dim.2, z=~Dim.3,
type="scatter3d", mode="markers", color=PCA.results$Type,
marker = list(line = list(color = "1", width = 0.5)))
# saving out PCA results for later
PCA.SAA <- PCA.OSP
Next we will carry out an LDA to see if that improves group separation.
# Defining a data frame with sample type in column 1 and 6 EAA's in columns 2-7.
data.train = Data.sources[c("Group",tracers)]
# Subsetting data to include Microalgae, Fungi, and Bacteria,
# and not macroalgae, plants, and seagrass
# data.train = subset(data.train, Type == "Microalgae" | Type == "Fungi" | Type == "Bacteria")
ntypes = nlevels(as.factor(data.train$Group))
# fitting the model with leave one out cross validation
LDA.test = lda(Group ~ . ,data = data.train, CV = TRUE,
prior = rep(1/ntypes, ntypes))
Warning in lda.default(x, grouping, ...): group Submicron is empty
# print model result
# LDA.train
# create a table which compares the classification of the LDA model to the actual producer type
ct.prod.norm <- table(data.train$Group,
LDA.test$class)
# total percent of samples correctly classified is the sum of the diagonal of this table
noquote(c('% successfully categorized: ', sum(diag(prop.table(ct.prod.norm))))) #85% effective
[1] % successfully categorized: 0.962962962962963
# Refitting the model using all of the available training data
LDA.full = lda(Group ~ . ,data = data.train, CV = FALSE, prior = rep(1/ntypes, ntypes))
Warning in lda.default(x, grouping, ...): group Submicron is empty
LDA.full
Call:
lda(Group ~ ., data = data.train, CV = FALSE, prior = rep(1/ntypes,
ntypes))
Prior probabilities of groups:
Surface Large Small
0.25 0.25 0.25
Group means:
d15NSer d15NGly d15NLys d15NPhe
Surface -3.460240 -1.435940 -2.079949 -1.793572
Large 2.102483 3.620269 1.966246 2.509898
Small 6.224073 5.848932 5.543239 6.609737
Coefficients of linear discriminants:
LD1 LD2 LD3
d15NSer -0.5631996 -0.1983251 0.8680862
d15NGly -0.4695364 0.8748368 -0.2071318
d15NLys -0.1260141 -0.2558877 0.1168813
d15NPhe -0.2005972 -0.3217850 -0.9178335
Proportion of trace:
LD1 LD2 LD3
0.9841 0.0115 0.0044
# save LDA for later use
LDA.SAA <- LDA.full
# store locations of training data in LD space for later plotting
pred.train = predict(LDA.full, data.train[-1])
class.train = data.frame('Type' = data.train$Group, pred.train$x)
theme_set(theme_light()+
theme(panel.grid.major.x = element_line(colour = NA),
panel.grid.major.y = element_line(colour = "grey95")))
# Generate biplot with Zooplankton overlayed onto training data
plot.mix.LD12 = ggplot(data = class.train,
aes(x = LD2, y = LD1, color = Type), size = 2) +
geom_point(alpha = 0.6) +
stat_ellipse(type = 't', alpha = 0.6)+
# geom_point(data = class.zoops,
# aes(x = LD2, y = LD1, shape = Predicted),color = 'black')+
# geom_text(data = class.zoops,
# aes(label=Depth),hjust=-0.2, vjust=1.1)+
# coord_cartesian(xlim=c(-5,5), ylim=c(-7.5,5)) +
theme(legend.position = 'none')
plot.mix.LD13 = ggplot(data = class.train,
aes(x = LD3, y = LD1, color = Type), size = 2) +
geom_point(alpha = 0.6) +
stat_ellipse(type = 't', alpha = 0.6)+
# geom_point(data = class.zoops,
# aes(x = LD3, y = LD1, shape = Predicted), color = 'black')+
# geom_text(data = class.zoops,
# aes(label=Depth),hjust=-0.2, vjust=1.1)+
# coord_cartesian(xlim=c(-5,5), ylim=c(-7.5,5)) +
labs(shape = "Zooplankton") + labs(color = "Training Data")
grid.arrange(ncol = 2, widths = c(1,1.4), plot.mix.LD12, plot.mix.LD13, top = 'Literature Producers') + theme(legend.position = 'top')
NULL
# We'll also try and put LD1, 2, and 3 on 3D axes
plot_ly(data=class.train, x=~LD1, y=~LD2, z=~LD3,
type="scatter3d", mode="markers", color=class.train$Type,
marker = list(line = list(color = "1", width = 0.5)))
Now we’ll run some of the same multivariate analyses but looking at a specific selection of amino acid \(\mathrm{\delta^{15}\text{N}}\). Values. This is going to be Phe, Lys, and Thr, since these are the tracers used in the model.
tracers <- tracers_select
SDtracers <- SDtracers_select
include_d13C <- FALSE
if (include_d13C == TRUE) {
sources.dC.long <-
melt(Data.sources, id.vars=c(variables), measure.vars = intersect(tracers_d13C, tracers),
variable.name="AA")
sources.dC.long$AA <- sub('....','',sources.dC.long$AA)
sources.dC.long$iso <- "d13C"
if (include_d15N == FALSE) {
sources.long <- sources.dC.long
}
}
if (include_d15N == TRUE) {
sources.dN.long <-
melt(Data.sources, id.vars=c(variables), measure.vars = intersect(tracers_d15N, tracers),
variable.name="AA")
sources.dN.long$AA <- sub('....','',sources.dN.long$AA)
sources.dN.long$iso <- "d15N"
if (include_d13C == FALSE) {
sources.long <- sources.dN.long
}
}
if (include_d13C == include_d15N) {
sources.long <- rbind(sources.dN.long,sources.dC.long)
}
# Defining the preferred order in which to reference amino acids
sources.long$AA <- factor(sources.long$AA, levels = allAA.ord)
theme_set(theme_light()+
theme(panel.grid.major.x = element_line(colour = NA),
panel.grid.major.y = element_line(colour = "grey95"),
axis.text.x = element_text(angle = 60, vjust = 1.6, hjust=1.7),
axis.ticks.x = element_blank()))
AA_plots <-
ggplot(sources.long, aes(x = AA, y = value, color = Group, group=Group))+
geom_point(alpha=0.6, position = position_dodge(width = 0.8))+
ylab(expression(delta^{15}*N*" or "*delta^{13}*C*" (\u2030)"))+
labs(color="Organic Matter Source", shape="Food Web Base", fill="Zooplankton Samples")+
facet_row(~iso, scales="free", space="free")+
geom_vline(xintercept = seq(0.5,length(sources.long$AA),1), color="grey75", lwd=0.3)
AA_plots
Let’s also quickly calculate the effective dimensionality of the dataset.
ED <- estimate.ED(Data.sources[c(tracers)])
[1] ED estimated from the correlation matrix; no disattenuation; no small-sample correction.
ED$n1
[1] 1.69
The effective dimensionaltiy of the dataset is 1.69.
PCA to produce a 2-D visualization of how these groups separate in this multivariate parameter space.
# Fitting PCA and adding Type as a supplimental qualitative variable
PCA.OSP = PCA(Data.sources[c(tracers)], scale.unit = TRUE, graph = FALSE)
# summary(PCA.lit)
# Plot component eigenvalues and print
fviz_eig(PCA.OSP, addlabels = FALSE)
# Plot PC1&2 results and project variable vectors and print
fviz_pca_biplot(PCA.OSP, axes = c(1,2), col.ind = Data.sources$Group,
addlabels=FALSE, addEllipses = TRUE, col.var = 'grey50', repel=TRUE)
fviz_pca_biplot(PCA.OSP, axes = c(1,3), col.ind = Data.sources$Group,
addlabels=FALSE, addEllipses = TRUE, col.var = 'grey50', repel=TRUE)
# generate 3d plot of PC1 PC2 and PC3
PCA.results <- as.data.frame(PCA.OSP$ind$coord)
PCA.results$Type <- Data.sources$Group
plot_ly(data=PCA.results, x=~Dim.1, y=~Dim.2, z=~Dim.3,
type="scatter3d", mode="markers", color=PCA.results$Type,
marker = list(line = list(color = "1", width = 0.5)))
# saving out PCA results for later
PCA.select <- PCA.OSP
Next we will carry out an LDA to see if that improves group separation.
# Defining a data frame with sample type in column 1 and 6 EAA's in columns 2-7.
data.train = Data.sources[c("Group",tracers)]
# Subsetting data to include Microalgae, Fungi, and Bacteria,
# and not macroalgae, plants, and seagrass
# data.train = subset(data.train, Type == "Microalgae" | Type == "Fungi" | Type == "Bacteria")
ntypes = nlevels(as.factor(data.train$Group))
# fitting the model with leave one out cross validation
LDA.test = lda(Group ~ . ,data = data.train, CV = TRUE,
prior = rep(1/ntypes, ntypes))
Warning in lda.default(x, grouping, ...): group Submicron is empty
# print model result
# LDA.train
# create a table which compares the classification of the LDA model to the actual producer type
ct.prod.norm <- table(data.train$Group,
LDA.test$class)
# total percent of samples correctly classified is the sum of the diagonal of this table
noquote(c('% successfully categorized: ', sum(diag(prop.table(ct.prod.norm))))) #85% effective
[1] % successfully categorized: 0.962962962962963
# Refitting the model using all of the available training data
LDA.full = lda(Group ~ . ,data = data.train, CV = FALSE, prior = rep(1/ntypes, ntypes))
Warning in lda.default(x, grouping, ...): group Submicron is empty
LDA.full
Call:
lda(Group ~ ., data = data.train, CV = FALSE, prior = rep(1/ntypes,
ntypes))
Prior probabilities of groups:
Surface Large Small
0.25 0.25 0.25
Group means:
d15NLys d15NPhe d15NThr
Surface -2.079949 -1.793572 -1.666229
Large 1.966246 2.509898 -4.582757
Small 5.543239 6.609737 2.365049
Coefficients of linear discriminants:
LD1 LD2 LD3
d15NLys 0.3040489 0.50299556 1.04672643
d15NPhe 0.6237580 -0.09112708 -0.95327460
d15NThr 0.1663696 -0.69689051 0.01889849
Proportion of trace:
LD1 LD2 LD3
0.7959 0.2031 0.0010
# save LDA for later use
LDA.select <- LDA.full
# store locations of training data in LD space for later plotting
pred.train = predict(LDA.full, data.train[-1])
class.train = data.frame('Type' = data.train$Group, pred.train$x)
theme_set(theme_light()+
theme(panel.grid.major.x = element_line(colour = NA),
panel.grid.major.y = element_line(colour = "grey95")))
# Generate biplot with Zooplankton overlayed onto training data
plot.mix.LD12 = ggplot(data = class.train,
aes(x = LD2, y = LD1, color = Type), size = 2) +
geom_point(alpha = 0.6) +
stat_ellipse(type = 't', alpha = 0.6)+
# geom_point(data = class.zoops,
# aes(x = LD2, y = LD1, shape = Predicted),color = 'black')+
# geom_text(data = class.zoops,
# aes(label=Depth),hjust=-0.2, vjust=1.1)+
# coord_cartesian(xlim=c(-5,5), ylim=c(-7.5,5)) +
theme(legend.position = 'none')
plot.mix.LD13 = ggplot(data = class.train,
aes(x = LD3, y = LD1, color = Type), size = 2) +
geom_point(alpha = 0.6) +
stat_ellipse(type = 't', alpha = 0.6)+
# geom_point(data = class.zoops,
# aes(x = LD3, y = LD1, shape = Predicted), color = 'black')+
# geom_text(data = class.zoops,
# aes(label=Depth),hjust=-0.2, vjust=1.1)+
# coord_cartesian(xlim=c(-5,5), ylim=c(-7.5,5)) +
labs(shape = "Zooplankton") + labs(color = "Training Data")
grid.arrange(ncol = 2, widths = c(1,1.4), plot.mix.LD12, plot.mix.LD13, top = 'Literature Producers') + theme(legend.position = 'top')
NULL
# We'll also try and put LD1, 2, and 3 on 3D axes
plot_ly(data=class.train, x=~LD1, y=~LD2, z=~LD3,
type="scatter3d", mode="markers", color=class.train$Type,
marker = list(line = list(color = "1", width = 0.5)))
Next we’re going to generate some plots, summarizing how the our explanatory power changes as a function of the tracers included. To do this we’ll try and plot results from PCAs and LDAs, paired with the variance explained by each dimension.
var.exp.PCA <-
data.frame("AAs" =
factor(c("SAAs","Select d15N","All d15N"),
levels = c("SAAs","Select d15N","All d15N")))
var.exp.PCA[1,c("Dim1","Dim2","Dim3")] <- PCA.SAA$eig[1:3,2]
var.exp.PCA[2,c("Dim1","Dim2","Dim3")] <- PCA.select$eig[1:3,2]
var.exp.PCA[3,c("Dim1","Dim2","Dim3")] <- PCA.all$eig[1:3,2]
var.exp.PCA.long <-
melt(var.exp.PCA,id.vars = "AAs", value.name = "Variance_Explained", variable.name = "Dimension")
ggplot(data = var.exp.PCA.long, aes(x=AAs, y=Variance_Explained,
fill = Dimension, group = Dimension))+
geom_col(position = "fill")+
ggtitle("Relative Variance Explained by 1st 3 PCA Dimensions")+
xlab("Tracer Suite") + ylab("% of Variance Explained")+
scale_fill_manual(values=c("steelblue1","royalblue1","royalblue4"))+
scale_color_manual(values=c("steelblue1","royalblue1","royalblue4"))
PCA.results.SAA <- as.data.frame(PCA.SAA$ind$coord)
PCA.results.SAA$Type <- Data.sources$Group
PCA.results.select <- as.data.frame(
cbind(PCA.select$ind$coord), PCA.select$eig)
PCA.results.select$Type <- Data.sources$Group
PCA.results.all <- as.data.frame(PCA.all$ind$coord)
PCA.results.all$Type <- Data.sources$Group
biplot.SAA <-
ggplot(data = PCA.results.SAA,
aes(y = Dim.2, x = Dim.1, color = Type), size = 2) +
geom_point(alpha = 0.6) +
stat_ellipse(type = 't', alpha = 0.6)+
ggtitle(expression("SAA "*delta^{15}*"N"))+
xlab(paste("PC1(",round(var.exp.PCA[1,2],0),"%)",sep = ""))+
ylab(paste("PC2(",round(var.exp.PCA[1,3],0),"%)",sep = ""))+
theme_light()+theme(text=element_text(family="serif"))+
theme(legend.position = 'none')
biplot.select <-
ggplot(data = PCA.results.select,
aes(y = Dim.2, x = Dim.1, color = Type), size = 2) +
geom_point(alpha = 0.6) +
stat_ellipse(type = 't', alpha = 0.6)+
ggtitle(expression("Select "*delta^{15}*"N"))+
xlab(paste("PC1(",round(var.exp.PCA[2,2],0),"%)",sep = ""))+
ylab(paste("PC2(",round(var.exp.PCA[2,3],0),"%)",sep = ""))+
theme_light()+theme(text=element_text(family="serif"))+
theme(legend.position = 'none')
biplot.all1 <-
ggplot(data = PCA.results.all,
aes(y = Dim.2, x = Dim.1, color = Type), size = 2) +
geom_point(alpha = 0.6) +
stat_ellipse(type = 't', alpha = 0.6)+
ggtitle(expression("All "*delta^{15}*"N, "*delta^{13}*"C"))+
xlab(NULL)+#paste("PC1(",round(var.exp.PCA[3,2],0),"%)",sep = ""))+
ylab(paste("PC2(",round(var.exp.PCA[4,3],0),"%)",sep = ""))+
theme_light()+theme(text=element_text(family="serif"))+
theme(legend.position = 'none')
ggarrange(biplot.SAA,
biplot.select,
biplot.all1,
ncol = 3)
var.exp.LDA <-
data.frame("AAs" =
factor(c("SAAs","Select d15N","All d15N"),
levels = c("SAAs","Select d15N","All d15N")))
var.exp.LDA[1,c("Dim1","Dim2","Dim3")] <- LDA.SAA$svd^2/sum(LDA.SAA$svd^2)
var.exp.LDA[2,c("Dim1","Dim2","Dim3")] <- LDA.select$svd^2/sum(LDA.select$svd^2)
var.exp.LDA[3,c("Dim1","Dim2","Dim3")] <- LDA.all$svd^2/sum(LDA.all$svd^2)
var.exp.LDA.long <-
melt(var.exp.LDA,id.vars = "AAs", value.name = "Variance_Explained", variable.name = "Dimension")
ggplot(data = var.exp.LDA.long, aes(x=AAs, y=Variance_Explained,
fill = Dimension, group = Dimension))+
geom_col()+
ggtitle("Variance Explained by LDA Dimensions")+
xlab("Tracer Suite") + ylab("% of Variance Explained")+
scale_fill_manual(values=c("steelblue1","royalblue1","royalblue4"))+
scale_color_manual(values=c("steelblue1","royalblue1","royalblue4"))
data.train = Data.sources[c("Group",tracers_SAA)]
pred.train = predict(LDA.SAA, data.train[-1])
class.train.SAA = data.frame('Type' = data.train$Group, pred.train$x)
data.train = Data.sources[c("Group",tracers_select)]
pred.train = predict(LDA.select, data.train[-1])
class.train.select = data.frame('Type' = data.train$Group, pred.train$x)
data.train = Data.sources[c("Group",tracers_all)]
pred.train = predict(LDA.all, data.train[-1])
class.train.all= data.frame('Type' = data.train$Group, pred.train$x)
biplot.SAA <-
ggplot(data = class.train.SAA,
aes(x = LD2, y = LD1, color = Type), size = 2) +
geom_point(alpha = 0.6) +
stat_ellipse(type = 't', alpha = 0.6)+
theme(legend.position = 'none')+
ggtitle("SAA d15N")+
ylab(paste("LD1(",round(var.exp.LDA[1,2]*100,0),"%)",sep = ""))+
xlab(paste("LD2(",round(var.exp.LDA[1,3]*100,0),"%)",sep = ""))
biplot.select <-
ggplot(data = class.train.select,
aes(x = LD2, y = LD1, color = Type), size = 2) +
geom_point(alpha = 0.6) +
stat_ellipse(type = 't', alpha = 0.6)+
theme(legend.position = 'none')+
ggtitle("Select d15N")+
ylab(paste("LD1(",round(var.exp.LDA[2,2]*100,0),"%)",sep = ""))+
xlab(paste("LD2(",round(var.exp.LDA[2,3]*100,0),"%)",sep = ""))
biplot.all <-
ggplot(data = class.train.all,
aes(x = LD2, y = LD1, color = Type), size = 2) +
geom_point(alpha = 0.6) +
stat_ellipse(type = 't', alpha = 0.6)+
theme(legend.position = 'none')+
ggtitle("All d15N, d13C")+
ylab(paste("LD1(",round(var.exp.LDA[4,2]*100,0),"%)",sep = ""))+
xlab(paste("LD2(",round(var.exp.LDA[4,3]*100,0),"%)",sep = ""))
ggarrange(biplot.SAA,
biplot.select,
biplot.all, ncol = 3)
Making a plot for the paper.
eigens <- data.frame(PCA.select$var$coord)
eigens$AA <- row.names(eigens)
AA.custom <- c(
as.character(expression(delta^{15}*"N"[Lys])),
as.character(expression(delta^{15}*"N"[Phe])),
as.character(expression(delta^{15}*"N"[Thr]))
)
biplot.select <-
ggplot(data = PCA.results.select,
aes(y = Dim.2, x = Dim.1, color = Type), size = 2) +
geom_point(aes(shape = Type), alpha = 1, size=2) +
stat_ellipse(type = 't', alpha = 0.6, size=0.5)+
geom_segment(data = eigens,
aes(x=0, xend = Dim.1*2,
y=0, yend =Dim.2*2),
color = "grey50", size=0.5,
arrow = arrow(length = unit(0.2, "cm")))+
geom_text(data = eigens,
aes(x = Dim.1*2 + c(0.7,0.3,0.2),
y = Dim.2*2 + c(0.15,-0.3,0.2),
label = AA.custom),
color = "grey50", parse = TRUE)+
labs(color = "Organic Matter Source", shape = "Organic Matter Source")+
xlab(paste("PC1(",round(var.exp.PCA[2,2],0),"%)",sep = ""))+
ylab(paste("PC2(",round(var.exp.PCA[2,3],0),"%)",sep = ""))+
theme_light()+theme(text=element_text(family="serif"))
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
generated.
biplot.select